Data: https://umich.box.com/s/wlnkizxbvglqse1kck91jemrmywdhjxf
eda1 <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/res.EDA1.rds")
eda2 <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/res.EDA2.rds")
eda3 <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/res.EDA3.rds")
eda4 <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/res.EDA4.rds")
udb1 <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/res.UDB1.rds")
udb2 <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/res.UDB2.rds")
udb3 <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/res.UDB3.rds")
udb4 <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/res.UDB4.rds")
eda1.tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/tru.EDA1.rds")
eda2.tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/tru.EDA2.rds")
eda3.tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/tru.EDA3.rds")
eda4.tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/tru.EDA4.rds")
udb1.tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/tru.UDB1.rds")
udb2.tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/tru.UDB2.rds")
udb3.tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/tru.UDB3.rds")
udb4.tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/tru.UDB4.rds")
Merge the data
res <- c(eda1,eda2,eda3,eda4,udb1,udb2,udb3,udb4)
tru <- c(eda1.tru, eda2.tru, eda3.tru, eda4.tru, udb1.tru, udb2.tru, udb3.tru, udb4.tru)
saveRDS(res,file = "~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/results.rds")
saveRDS(tru,file = "~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/truth.rds")
res <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/results.rds")
tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/truth.rds")
##. 2. Calculate clustering comparison metrics. Calculate Adjusted rand index
library(mclust)
## Package 'mclust' version 5.4.2
## Type 'citation("mclust")' for citing this R package in publications.
adj.mat <- matrix(0, nrow = 1024, ncol = 225)
dats <- names(res)
for(i in c(1:length(dats))){
name = dats[i]
res.mat <- res[[name]]
tru.vec <- tru[[name]]
adj.mat[i,] = apply(res.mat,2,function(x) adjustedRandIndex(x, tru.vec))
}
colnames(adj.mat) <- colnames(res[[1]])
rownames(adj.mat) <- dats
fmeasure <- function(clu1,clu2,label=NULL){
#Label should be from clu1, exchange clu1 and clu2 when necessary
if(sum(!is.na(clu1))==0 || sum(!is.na(clu2))==0){
return(NA)
}
contab <- as.matrix(table(factor(clu1),factor(clu2)))
if(is.null(label)){
clus <- 1:length(unique(clu1))
}else{
clus <- c(label)
}
totf = 0;
for(j in clus){
maxf = 0;
for(i in 1:length(unique(clu2))){
pij = contab[j,i]/sum(contab[,i])
rij = contab[j,i]/sum(contab[j,])
fmea = 2*rij*pij/(rij+pij)
if(!is.na(fmea) & fmea > maxf){
maxf = fmea
}
}
totf = totf+maxf*sum(contab[j,])
}
totf = totf/sum(contab[clus,])
return(totf)
}
fm.mat <- matrix(0, nrow = 1024, ncol = 225)
dats <- names(res)
for(i in c(1:length(dats))){
name = dats[i]
res.mat <- res[[name]]
tru.vec <- tru[[name]]
fm.mat[i,] = apply(res.mat,2,function(x) fmeasure(x, tru.vec))
}
colnames(fm.mat) <- colnames(res[[1]])
rownames(fm.mat) <- dats
dats <- names(res)
adj.mat <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/adj.mat.rds")
fm.mat <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/fm.mat.rds")
##. 3. Overall analysis Extract Dataset features (Equl/Unequal distance, Equal/Unequal Size, etc)
cdist <- unlist(lapply(dats, function(x) strsplit(x,"_")[[1]][1]))
distype <- unlist(lapply(cdist, function(x) substr(x,1,3)))
csize <- unlist(lapply(dats, function(x) strsplit(x,"_")[[1]][2]))
libsize <- unlist(lapply(dats, function(x) strsplit(x,"_")[[1]][3]))
libscale <- unlist(lapply(dats, function(x) strsplit(x,"_")[[1]][4]))
dropout <- unlist(lapply(dats, function(x) strsplit(x,"_")[[1]][5]))
dat.features <- cbind(cdist,csize,libsize,libscale,dropout)
colnames(dat.features) <- c("Cluster_Distance", "Cluster_Size", "Library_Size", "Library_Scale", "Dropout")
rownames(dat.features) <- dats
PCA remove all zero columns and replace NA with 0
adj.mat.fil <- adj.mat[,apply(adj.mat,2,function(x) sum(is.na(x))!=1024)]
adj.mat.fil[is.na(adj.mat.fil)] = 0
adj.pca <- prcomp(adj.mat.fil)
adj.tsne <- Rtsne::Rtsne(adj.mat.fil)
adj.umap <- umap::umap(adj.mat.fil)
par(mfrow=c(2,3))
plot(adj.pca$x[,c(1:2)], col=as.factor(cdist), pch=16, main="Cluster Distance")
plot(adj.pca$x[,c(1:2)], col=as.factor(distype), pch=16, main="Equal/Unequal Distance")
plot(adj.pca$x[,c(1:2)], col=as.factor(csize), pch=16, main="Equal/Unequal Cluster Size")
plot(adj.pca$x[,c(1:2)], col=as.factor(libsize), pch=16, main="Library Size")
plot(adj.pca$x[,c(1:2)], col=as.factor(libscale), pch=16, main="Library Scale")
plot(adj.pca$x[,c(1:2)], col=as.factor(dropout), pch=16, main="Dropout rate")
plot(adj.pca$x[,c(1:2)], col=c("#9bc0ee","#70a5e7","#4489df","#236fce","#9bc0ee","#70a5e7","#4489df","#236fce")[as.factor(cdist)], pch=16, main="Cluster Distance")
plot(adj.pca$x[,c(1:2)], col=c("#c6c4c4","#7a7676")[as.factor(distype)], pch=16, main="Equal/Unequal Distance")
plot(adj.pca$x[,c(1:2)], col=c("#c0ee9b","#6fce23")[as.factor(csize)], pch=16, main="Equal/Unequal Cluster Size")
plot(adj.pca$x[,c(1:2)], col=c("#eec99b", "#e7b270","#df9a44","#ce8223")[as.factor(libsize)], pch=16, main="Library Size")
plot(adj.pca$x[,c(1:2)], col=c("#c99bee","#b270e7","#9a44df","#8223ce")[as.factor(libscale)], pch=16, main="Library Scale")
plot(adj.pca$x[,c(1:2)], col=c("#eea09b","#e77770","#df4d44","#ce2d23")[as.factor(dropout)], pch=16, main="Dropout rate")
par(mfrow=c(2,3))
plot(adj.tsne$Y[,c(1:2)], col=c("#9bc0ee","#70a5e7","#4489df","#236fce","#9bc0ee","#70a5e7","#4489df","#236fce")[as.factor(cdist)], pch=16, main="Cluster Distance")
plot(adj.tsne$Y[,c(1:2)], col=c("#c6c4c4","#7a7676")[as.factor(distype)], pch=16, main="Equal/Unequal cluster distance")
plot(adj.tsne$Y[,c(1:2)], col=c("#c0ee9b","#6fce23")[as.factor(csize)], pch=16, main="Equal/Unequal cluster size")
plot(adj.tsne$Y[,c(1:2)], col=c("#eec99b", "#e7b270","#df9a44","#ce8223")[as.factor(libsize)], pch=16, main="Library Size")
plot(adj.tsne$Y[,c(1:2)], col=c("#c99bee","#b270e7","#9a44df","#8223ce")[as.factor(libscale)], pch=16, main="Library Scale")
plot(adj.tsne$Y[,c(1:2)], col=c("#eea09b","#e77770","#df4d44","#ce2d23")[as.factor(dropout)], pch=16, main="Dropout rate")
par(mfrow=c(2,3))
plot(adj.umap$layout[,c(1:2)], col=c("#9bc0ee","#70a5e7","#4489df","#236fce","#9bc0ee","#70a5e7","#4489df","#236fce")[as.factor(cdist)], pch=16, main="Cluster distance")
plot(adj.umap$layout[,c(1:2)], col=c("#c6c4c4","#7a7676")[as.factor(distype)], pch=16,main="Equal/Unequal Cluster Distance")
plot(adj.umap$layout[,c(1:2)], col=c("#c0ee9b","#6fce23")[as.factor(csize)], pch=16, main="Equal/Unequal Cluster Size")
plot(adj.umap$layout[,c(1:2)], col=c("#eec99b", "#e7b270","#df9a44","#ce8223")[as.factor(libsize)], pch=16, main="Library Size")
plot(adj.umap$layout[,c(1:2)], col=c("#c99bee","#b270e7","#9a44df","#8223ce")[as.factor(libscale)], pch=16, main="Library Scale")
plot(adj.umap$layout[,c(1:2)], col=c("#eea09b","#e77770","#df4d44","#ce2d23")[as.factor(dropout)], pch=16, main="Dropout")
fm.mat.fil <- fm.mat[,apply(fm.mat,2,function(x) sum(is.na(x))!=1024)]
fm.mat.fil[is.na(fm.mat.fil)] = 0
fm.pca <- prcomp(fm.mat.fil)
fm.tsne <- Rtsne::Rtsne(fm.mat.fil)
fm.umap <- umap::umap(fm.mat.fil)
par(mfrow=c(2,3))
plot(fm.pca$x[,c(1:2)], col=c("#9bc0ee","#70a5e7","#4489df","#236fce","#9bc0ee","#70a5e7","#4489df","#236fce")[as.factor(cdist)], pch=16, main="Cluster distance")
plot(fm.pca$x[,c(1:2)], col=c("#c6c4c4","#7a7676")[as.factor(distype)], pch=16, main="Equal/Unequal Cluster Distance")
plot(fm.pca$x[,c(1:2)], col=c("#c0ee9b","#6fce23")[as.factor(csize)], pch=16, main="Equal/Unequal Cluster Size")
plot(fm.pca$x[,c(1:2)], col=c("#eec99b", "#e7b270","#df9a44","#ce8223")[as.factor(libsize)], pch=16, main="Library Size")
plot(fm.pca$x[,c(1:2)], col=c("#c99bee","#b270e7","#9a44df","#8223ce")[as.factor(libscale)], pch=16, main="Library Scale")
plot(fm.pca$x[,c(1:2)], col=c("#eea09b","#e77770","#df4d44","#ce2d23")[as.factor(dropout)], pch=16, main="Dropout")
par(mfrow=c(2,3))
plot(fm.tsne$Y[,c(1:2)], col=c("#9bc0ee","#70a5e7","#4489df","#236fce","#9bc0ee","#70a5e7","#4489df","#236fce")[as.factor(cdist)], pch=16, main="Cluster Distance")
plot(fm.tsne$Y[,c(1:2)], col=c("#c6c4c4","#7a7676")[as.factor(distype)], pch=16, main="Equal/Unequal Cluster Distance")
plot(fm.tsne$Y[,c(1:2)], col=c("#c0ee9b","#6fce23")[as.factor(csize)], pch=16, main="Equal/Unequal Cluster Size")
plot(fm.tsne$Y[,c(1:2)], col=c("#eec99b", "#e7b270","#df9a44","#ce8223")[as.factor(libsize)], pch=16, main="Library Size")
plot(fm.tsne$Y[,c(1:2)], col=c("#c99bee","#b270e7","#9a44df","#8223ce")[as.factor(libscale)], pch=16, main="Library Scale")
plot(fm.tsne$Y[,c(1:2)], col=c("#eea09b","#e77770","#df4d44","#ce2d23")[as.factor(dropout)], pch=16, main="Dropout")
par(mfrow=c(2,3))
plot(fm.umap$layout, col=c("#9bc0ee","#70a5e7","#4489df","#236fce","#9bc0ee","#70a5e7","#4489df","#236fce")[as.factor(cdist)], pch=16, main="Cluster Distance")
plot(fm.umap$layout, col=c("#c6c4c4","#7a7676")[as.factor(distype)], pch=16, main="Equal/Unequal Cluster Distance")
plot(fm.umap$layout, col=c("#c0ee9b","#6fce23")[as.factor(csize)], pch=16, main="Equal/Unequal Cluster Size")
plot(fm.umap$layout, col=c("#eec99b", "#e7b270","#df9a44","#ce8223")[as.factor(libsize)], pch=16, main="Library Size")
plot(fm.umap$layout, col=c("#c99bee","#b270e7","#9a44df","#8223ce")[as.factor(libscale)], pch=16, main="Library Scale")
plot(fm.umap$layout, col=c("#eea09b","#e77770","#df4d44","#ce2d23")[as.factor(dropout)], pch=16, main="Dropout")
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 1.99.5
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
## ========================================
##
## This version is a major update of the package. The major new features are:
##
## 1. Support splitting heatmaps by columns.
## 2. Support concatenating heatmaps/annotations vertically.
## 3. Provide more types of heatmap annotations.
## 4. Support UpSet plot.
##
## Note this version is not 100% compatible with the older versions (< 1.99.0).
## Please check by `vignette('difference_to_old_versions', package = 'ComplexHeatmap')`.
##
## Above messages will be removed in the future.
library(circlize)
## ========================================
## circlize version 0.4.5
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: http://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
## ========================================
meth.gene <-unlist(lapply(colnames(adj.mat.fil), function(x) strsplit(x,"_")[[1]][1]))
meth.algo <-unlist(lapply(colnames(adj.mat.fil), function(x) strsplit(x,"_")[[1]][2]))
meth.k <-unlist(lapply(colnames(adj.mat.fil), function(x) strsplit(x,"_")[[1]][3]))
meth.k[is.na(meth.k)]=4
###. All data + ARI
ha = HeatmapAnnotation(df = data.frame(Distance_Type=distype, dat.features),
col = list(Distance_Type=c("EDA"="#c6c4c4","UDB"="#7a7676"),
Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce","UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
hm = rowAnnotation(df = data.frame(Gene_Selection=meth.gene, K=meth.k),
col = list(Gene_Selection=c("HVG"="#23ce82", "HEG"="#5ae3a6", "HDG"="#9beec9"),
K=c("2"="#236fce","3"="#2e7cdc","4"="#5a97e3","5"="#70a5e7","6"="#9bc0ee")))
Heatmap(t(adj.mat.fil), name="ARI",cluster_rows = TRUE, cluster_columns = TRUE, top_annotation = ha,left_annotation = hm, show_row_names = FALSE, show_column_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
orderby <- function(x, by){
return(x[order(unlist(lapply(x, function(y) strsplit(y,"_")[[1]][by])))])
}
###. All data + f-measure
ha = HeatmapAnnotation(df = data.frame(Distance_Type=distype, dat.features),
col = list(Distance_Type=c("EDA"="#c6c4c4","UDB"="#7a7676"),
Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce","UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
hm = rowAnnotation(df = data.frame(Gene_Selection=meth.gene, K=meth.k),
col = list(Gene_Selection=c("HVG"="#23ce82", "HEG"="#5ae3a6", "HDG"="#9beec9"),
K=c("2"="#236fce","3"="#2e7cdc","4"="#5a97e3","5"="#70a5e7","6"="#9bc0ee")))
Heatmap(t(fm.mat.fil), name="F-measure",cluster_rows = TRUE, cluster_columns = TRUE, top_annotation = ha, left_annotation = hm, show_row_names = FALSE, show_column_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####. Equal distance (512) + HVG + k=4 + ARI
ha = rowAnnotation(df = data.frame(dat.features[1:512,]),
col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.mat.fil[grepl('EDA',rownames(adj.mat.fil)),grepl("HVG",colnames(adj.mat.fil))][,c(1:15)], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####. Equal distance (512) + HVG + k=4 + ARI + Order by Equal/Unequal cluster distance
adj.mat.fil.sub <- adj.mat.fil[grepl('EDA',rownames(adj.mat.fil)),grepl("HVG",colnames(adj.mat.fil))][,c(1:15)]
ha = rowAnnotation(df = data.frame(dat.features[1:512,][orderby(rownames(adj.mat.fil.sub),2),]),
col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.mat.fil.sub[orderby(rownames(adj.mat.fil.sub),2),], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####. Equal distance (512) + HVG + k=4 + ARI + Order by Library size
adj.mat.fil.sub <- adj.mat.fil[grepl('EDA',rownames(adj.mat.fil)),grepl("HVG",colnames(adj.mat.fil))][,c(1:15)]
ha = rowAnnotation(df = data.frame(dat.features[1:512,][orderby(rownames(adj.mat.fil.sub),3),]),
col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.mat.fil.sub[orderby(rownames(adj.mat.fil.sub),3),], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####. Equal distance (512) + HVG + k=4 + ARI + Order by Library scale
adj.mat.fil.sub <- adj.mat.fil[grepl('EDA',rownames(adj.mat.fil)),grepl("HVG",colnames(adj.mat.fil))][,c(1:15)]
ha = rowAnnotation(df = data.frame(dat.features[1:512,][orderby(rownames(adj.mat.fil.sub),4),]),
col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.mat.fil.sub[orderby(rownames(adj.mat.fil.sub),4),], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####. Equal distance (512) + HVG + k=4 + ARI + Order by Dropout
adj.mat.fil.sub <- adj.mat.fil[grepl('EDA',rownames(adj.mat.fil)),grepl("HVG",colnames(adj.mat.fil))][,c(1:15)]
ha = rowAnnotation(df = data.frame(dat.features[1:512,][orderby(rownames(adj.mat.fil.sub),5),]),
col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.mat.fil.sub[orderby(rownames(adj.mat.fil.sub),5),], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####. Equal distance (512) + HVG + k=4 + F-measure
ha = rowAnnotation(df = data.frame(dat.features[1:512,]),
col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.mat.fil[grepl('EDA',rownames(adj.mat.fil)),grepl("HVG",colnames(adj.mat.fil))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####. Equal distance (512) + HEG + k=4 + ARI
ha = rowAnnotation(df = data.frame(dat.features[1:512,]),
col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.mat.fil[grepl('EDA',rownames(adj.mat.fil)),grepl("HEG",colnames(adj.mat.fil))][,c(1:15)], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####. Equal distance (512) + HEG + k=4 + F-measure
ha = rowAnnotation(df = data.frame(dat.features[1:512,]),
col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.mat.fil[grepl('EDA',rownames(fm.mat.fil)),grepl("HEG",colnames(fm.mat.fil))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####. Equal distance (512) + HDG + k=4 + ARI
ha = rowAnnotation(df = data.frame(dat.features[1:512,]),
col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.mat.fil[grepl('EDA',rownames(adj.mat.fil)),grepl("HDG",colnames(adj.mat.fil))][,c(1:15)], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####. Equal distance (512) + HDG + k=4 + F-measure
ha = rowAnnotation(df = data.frame(dat.features[1:512,]),
col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.mat.fil[grepl('EDA',rownames(fm.mat.fil)),grepl("HDG",colnames(fm.mat.fil))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####.Unequal distance + HVG + k=4 + ARI
ha = rowAnnotation(df = data.frame(dat.features[513:1024,]),
col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.mat.fil[grepl('UDB',rownames(adj.mat.fil)),grepl("HVG",colnames(adj.mat.fil))][,c(1:15)], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####.Unequal distance + HVG + k=4 + F-measure
ha = rowAnnotation(df = data.frame(dat.features[513:1024,]),
col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.mat.fil[grepl('UDB',rownames(fm.mat.fil)),grepl("HVG",colnames(fm.mat.fil))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####.Unequal distance + HEG + k=4 + ARI
ha = rowAnnotation(df = data.frame(dat.features[513:1024,]),
col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.mat.fil[grepl('UDB',rownames(adj.mat.fil)),grepl("HEG",colnames(adj.mat.fil))][,c(1:15)], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####.Unequal distance + HEG + k=4 + F-measure
ha = rowAnnotation(df = data.frame(dat.features[513:1024,]),
col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.mat.fil[grepl('UDB',rownames(fm.mat.fil)),grepl("HEG",colnames(fm.mat.fil))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####.Unequal distance + HDG + k=4 + ARI
ha = rowAnnotation(df = data.frame(dat.features[513:1024,]),
col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.mat.fil[grepl('UDB',rownames(adj.mat.fil)),grepl("HDG",colnames(adj.mat.fil))][,c(1:15)], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####.Unequal distance + HDG + k=4 + F-measure
ha = rowAnnotation(df = data.frame(dat.features[513:1024,]),
col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.mat.fil[grepl('UDB',rownames(fm.mat.fil)),grepl("HDG",colnames(fm.mat.fil))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
##. Local performance for clusters with smaller size F-measure can be used to compare the clustering result with truth for a subset of the cells. Here we used F-measure to evaluate the performance of those algorithms only for clusters that have closer distance to each other or have relatively smaller size.
####. Calculate f-measurefor the two clusters with smaller size
fm.smclus.mat <- matrix(0, nrow = 512, ncol = 225)
dats.us <- names(res)[grepl("USB", names(res))]
for(i in c(1:length(dats.us))){
name = dats.us[i]
res.mat <- res[[name]]
tru.vec <- tru[[name]]
fm.smclus.mat[i,] = apply(res.mat,2,function(x) fmeasure(tru.vec,x,c("Group3","Group4")))
}
colnames(fm.smclus.mat) <- colnames(res[[1]])
rownames(fm.smclus.mat) <- dats.us
####.Unequal Size + Unequal Distance + HVG + k=4 + F-measure for two small clusters
ha = rowAnnotation(df = data.frame(dat.features[dats.us[grepl("UDB", rownames(fm.smclus.mat))],]),
col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.smclus.mat[grepl('UDB',rownames(fm.smclus.mat)) ,grepl("HVG",colnames(fm.smclus.mat))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####.Unequal Size + Equal Distance + HVG + k=4 + F-measure for two small clusters
ha = rowAnnotation(df = data.frame(dat.features[dats.us[grepl("EDA", rownames(fm.smclus.mat))],]),
col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.smclus.mat[grepl('EDA',rownames(fm.smclus.mat)) ,grepl("HVG",colnames(fm.smclus.mat))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####. Calculate f-measurefor the two clusters with larger size
fm.lgclus.mat <- matrix(0, nrow = 512, ncol = 225)
for(i in c(1:length(dats.us))){
name = dats.us[i]
res.mat <- res[[name]]
tru.vec <- tru[[name]]
fm.lgclus.mat[i,] = apply(res.mat,2,function(x) fmeasure(tru.vec,x,c("Group1","Group2")))
}
colnames(fm.lgclus.mat) <- colnames(res[[1]])
rownames(fm.lgclus.mat) <- dats.us
####.Unequal Size + Unequal Distance + HVG + k=4 + F-measure for two large clusters
ha = rowAnnotation(df = data.frame(dat.features[dats.us[grepl("UDB", rownames(fm.lgclus.mat))],]),
col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.lgclus.mat[grepl('UDB',rownames(fm.lgclus.mat)) ,grepl("HVG",colnames(fm.lgclus.mat))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####.Unequal Size + Unequal Distance + HVG + k=4 + F-measure for two large clusters
ha = rowAnnotation(df = data.frame(dat.features[dats.us[grepl("EDA", rownames(fm.lgclus.mat))],]),
col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.lgclus.mat[grepl('EDA',rownames(fm.lgclus.mat)) ,grepl("HVG",colnames(fm.lgclus.mat))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
##. Local performance for clusters that are closer to each other #### Calculate the f-measure for clusters that are cluster to each other (Group 3,4) or far from each other (Group 1, 2)
fm.undist.mat <- matrix(0, nrow = 512, ncol = 225)
dats.ud <- names(res)[grepl("UDB", names(res))]
for(i in c(1:length(dats.ud))){
name = dats.us[i]
res.mat <- res[[name]]
tru.vec <- tru[[name]]
fm.undist.mat[i,] = apply(res.mat,2,function(x) fmeasure(tru.vec,x,c("Group3","Group4")))
}
colnames(fm.undist.mat) <- colnames(res[[1]])
rownames(fm.undist.mat) <- dats.ud
fm.eqdist.mat <- matrix(0, nrow = 512, ncol = 225)
for(i in c(1:length(dats.ud))){
name = dats.us[i]
res.mat <- res[[name]]
tru.vec <- tru[[name]]
fm.eqdist.mat[i,] = apply(res.mat,2,function(x) fmeasure(tru.vec,x,c("Group1","Group2")))
}
colnames(fm.eqdist.mat) <- colnames(res[[1]])
rownames(fm.eqdist.mat) <- dats.ud
####.Equal Size + Unequal Distance + HVG + k=4 + F-measure for two closer clusters
ha = rowAnnotation(df = data.frame(dat.features[dats.ud[grepl("ESA", rownames(fm.undist.mat))],]),
col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.undist.mat[grepl('ESA',rownames(fm.undist.mat)) ,grepl("HVG",colnames(fm.undist.mat))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####.Equal Size + Unequal Distance + HVG + k=4 + F-measure for two farer clusters
ha = rowAnnotation(df = data.frame(dat.features[dats.ud[grepl("ESA", rownames(fm.eqdist.mat))],]),
col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.eqdist.mat[grepl('ESA',rownames(fm.eqdist.mat)) ,grepl("HVG",colnames(fm.eqdist.mat))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####.Unequal Size + Unequal Distance + HVG + k=4 + F-measure for two closer clusters
ha = rowAnnotation(df = data.frame(dat.features[dats.ud[grepl("USB", rownames(fm.undist.mat))],]),
col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.undist.mat[grepl('USB',rownames(fm.undist.mat)) ,grepl("HVG",colnames(fm.undist.mat))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####.Unequal Size + Unequal Distance + HVG + k=4 + F-measure for two farer clusters
ha = rowAnnotation(df = data.frame(dat.features[dats.ud[grepl("USB", rownames(fm.eqdist.mat))],]),
col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.eqdist.mat[grepl('USB',rownames(fm.eqdist.mat)) ,grepl("HVG",colnames(fm.eqdist.mat))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
##. Maximum ARI for each data (Best performance for each method but may not necessarily have k=4)
gsms <- c("HVG","HDG", "HEG")
adj.bestp.mat <- matrix(0, nrow=1024, ncol=45)
adj.bestp.k.mat <- matrix(0,nrow=1024, ncol=45)
rownames(adj.bestp.mat) <- rownames(adj.mat.fil)
colnames(adj.bestp.mat) <- unique(unlist(lapply(strsplit(colnames(adj.mat.fil),"_"), function(x) paste(x[1],x[2],sep = "_"))))
rownames(adj.bestp.k.mat) = rownames(adj.bestp.mat)
colnames(adj.bestp.k.mat) = colnames(adj.bestp.mat)
clums <- unique(unlist(lapply(strsplit(colnames(adj.mat.fil),"_"), function(x) x[2])))
for(i in rownames(adj.mat.fil)){
for(j in colnames(adj.bestp.mat)){
tmp <- adj.mat.fil[i, grepl(j, colnames(adj.mat.fil))]
adj.bestp.mat[i,j] = max(tmp)
if(is.na(strsplit(names(which.max(tmp)),"_")[[1]][3])){
adj.bestp.k.mat[i,j] = 4
}else{
adj.bestp.k.mat[i,j] = as.numeric(strsplit(names(which.max(tmp)),"_")[[1]][3])
}
}
}
ha = HeatmapAnnotation(df = data.frame(Distance_Type=distype, dat.features),
col = list(Distance_Type=c("EDA"="#c6c4c4","UDB"="#7a7676"),
Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce","UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
#hm = rowAnnotation(df = data.frame(Gene_Selection=meth.gene, K=meth.k), col = list(Gene_Selection=c("HVG"="#23ce82", "HEG"="#5ae3a6", "HDG"="#9beec9"), K=c("2"="#236fce","3"="#2e7cdc","4"="#5a97e3","5"="#70a5e7","6"="#9bc0ee")))
Heatmap(t(adj.bestp.mat), name="ARI",cluster_rows = TRUE, cluster_columns = TRUE, top_annotation = ha, show_row_names = FALSE, show_column_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####. Equal distance (512) + HVG + k=4 + ARI + best performance
ha = rowAnnotation(df = data.frame(dat.features[1:512,]),
col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.bestp.mat[grepl('EDA',rownames(adj.bestp.mat)),grepl("HVG",colnames(adj.bestp.mat))], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####. Equal distance (512) + HVG + k=4 + ARI + best performance
ha = rowAnnotation(df = data.frame(dat.features[1:512,]),
col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.bestp.k.mat[grepl('EDA',rownames(adj.bestp.k.mat)),grepl("HVG",colnames(adj.bestp.k.mat))], name="k",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(2, 4, 6), c("blue", "grey", "red")))
####. Equal distance (512) + HVG + k=4 + ARI + best performance
ha = rowAnnotation(df = data.frame(dat.features[513:1024,]),
col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.bestp.mat[grepl('UDB',rownames(adj.bestp.mat)),grepl("HVG",colnames(adj.bestp.mat))], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))
####. Equal distance (512) + HVG + k=4 + ARI + best performance
ha = rowAnnotation(df = data.frame(dat.features[513:1024,]),
col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"),
Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.bestp.k.mat[grepl('UDB',rownames(adj.bestp.k.mat)),grepl("HVG",colnames(adj.bestp.k.mat))], name="k",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(2, 4, 6), c("blue", "grey", "red")))